# -*- coding: utf-8 -*-
import requests
import json

import time

import sys

from libs import putian_coll, hospital_coll
from bs4 import BeautifulSoup
from geojson import Point

api_number = 0


# 获取莆田系名单
def get_putians():
    if putian_coll.count() > 0:
        return list(putian_coll.find_one()['list'])
    putian_str = requests.get(
        "https://raw.githubusercontent.com/open-power-workgroup/Hospital/master/resource/API_resource/hospital_list.json"
    ).text
    putian = dict(json.loads(putian_str))
    putians = []
    for name in putian:
        putians += putian[name].keys()
    putian_coll.insert_one({
        'list': putians
    })
    return putians


def get_province_list(soup):
    pl = dict()
    for province in soup.find_all('h4'):
        province_name = province.get_text(strip=True)
        city_names = []
        # 获取下2个元素
        citys = province.next_sibling.next_sibling
        for city in citys.find_all('a'):
            city_names.append(city.get_text(strip=True))
        pl[province_name] = city_names
    return pl


def get_province_name(city_map, city_name):
    for key, val in city_map.items():
        if city_name in val:
            return key


def get_hospitals():
    putians = get_putians()

    base = "http://yyk.99.com.cn"
    url = base + "/city.html"
    page = requests.get(url).text
    soup = BeautifulSoup(page, 'lxml')

    content = soup.find(id='areacontent')

    province_list = get_province_list(content)

    a_tag = content.find_all('a')
    for a in a_tag:
        city_name = a.get_text(strip=True)

        city_url = base + a['href']
        page = requests.get(city_url).text
        city_soup = BeautifulSoup(page, 'lxml')

        for div in city_soup.find_all(class_='tablist'):
            district = div.find('h4').get_text(strip=True)
            if district.find('(') >= 0:
                district = district[:district.find('(')]
            district = district.replace('更多>>', '')
            i = 0
            for hospital_a in div.find_all('a'):
                i += 1
                if i < 3:
                    continue
                hospital_name = hospital_a.get_text(strip=True)
                # 排除卫生站
                if '卫生服务站' in hospital_name or '社区' in hospital_name or hospital_name in putians:
                    continue
                print(hospital_name)
                # 容错,有了就跳过
                if hospital_coll.find_one({
                    'Name': hospital_name
                }) is not None:
                    print("已经有了,跳过")
                    continue
                time.sleep(2)
                while True:
                    try:
                        hospital_info = get_hospital_detail(hospital_a['href'], hospital_name)
                    except:
                        print("休息3s后重试")
                        time.sleep(3)
                        continue
                    if hospital_info == False:
                        break
                    if hospital_info:
                        break

                if hospital_info == False:
                    continue

                hospital_info['District'] = district
                hospital_info['City'] = city_name
                hospital_info['Province'] = get_province_name(province_list, city_name)

                print("新增成功")
                hospital_coll.insert_one({
                    'Name': hospital_name,
                    'Info': hospital_info
                })


def get_hospital_detail(url, name):
    page = requests.get(url, timeout=10)
    if page.status_code > 200:
        return False
    soup = BeautifulSoup(page.text, 'lxml')
    hospital_info = dict()
    content = soup.find(class_='hpi_content')
    if content is None:
        return False
    for idx, li in enumerate(content.find_all('li')):
        if idx == 0 or idx == 3 or idx == 5:
            continue
        info = li.get_text().split()[0].split('：')[1]
        if idx == 1:
            hospital_info['Quality'] = info
        elif idx == 2:
            hospital_info['Level'] = info
        elif idx == 4:
            if info == '':
                return False
            # 容错
            hospital_info['Address'] = info
            address_geo = get_address_geo(name, info)
            if address_geo == False:
                return False
            hospital_info['AddressGeo'] = address_geo
    return hospital_info


def get_address_geo(address, address2):
    from libs import get_baidu_api
    from urllib.parse import urlencode
    global api_number

    if address2.find('（'):
        address2 = address2[:address2.find('（')]
    if address2.find('号'):
        address2 = address2[:address2.find('号') + 1]

    baidu_api = get_baidu_api(api_number)
    if baidu_api == False:
        api_number = 0
        return False

    query = {
        'address': address,
        'output': 'json',
        'ak': baidu_api
    }
    url = "http://api.map.baidu.com/geocoder/v2/?" + urlencode(query)
    print(url)
    data = json.loads(requests.get(url).text)
    if data['status'] != 0:
        query = {
            'address': address2,
            'output': 'json',
            'ak': baidu_api
        }
        url = "http://api.map.baidu.com/geocoder/v2/?" + urlencode(query)
        print(url)
        data = json.loads(requests.get(url).text)
        if 0 < data['status'] < 3:
            return False
        elif data['status'] >= 3:
            api_number += 1
            return get_address_geo(address, address2)

    data = data['result']
    return {
        'Location': Point((data['location']['lng'], data['location']['lat'])),
        'Precise': data['precise'],
        'Confidence': data['confidence'],
        'Level': data['level']
    }


'''搜索距离
db.hospital.createIndex( { "Info.AddressGeo.Location" : "2dsphere" } )

{
    "Info.AddressGeo.Location": {
       $near : {$geometry:{
            type: "Point",
            coordinates: [-84.26060492426588, 30.45023887165371]
        }}
    }
}
'''
'''删除无
{ "Info.AddressGeo.Location.type": { $exists: false } }
'''

if __name__ == '__main__':
    get_hospitals()
